# Replication file
# Election noise models in "Fixed Effects and Post-Treatment Bias in Legacy Studies"

###
### first run: 3 degrees
###

rm(list=ls())
set.seed(12435)
library(geosphere); library(geostatsp); library(mvnfast)

# load data
load("elections_fes.RData")
elect_data_full <- elect_data

# calculate distances based on long and lat in data frame
elect_distances_full <- distm(elect_data_full[,c('long','lat')], fun=distGeo)

# rescale distances from m to km
elect_distances_full <- elect_distances_full/1000

# calculate matern variance covariance matrix, 3 degrees
matern_covar_full <- matern(elect_distances_full, param = c(range=3, shape=1, variance=1))

# generate noisy distance using matern covariance
noisy_dist_full <- rmvn(n = 1000, mu = rep(0, nrow(matern_covar_full)), sigma = matern_covar_full)

# get baseline p values (g estimator, not bootstrapped)
# full sample
mod00_a <- lm(AfDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
afd_pval_full <- summary(mod00_b)$coefficients[2,4]

mod01_e <- lm(AfDNPDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
afdnpd_pval_full <- summary(mod01_f)$coefficients[2,4]

# 70km subsample
elect_data70 <- subset(elect_data_full, distance2<7)
mod00_a <- lm(AfDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
afd_pval_70 <- summary(mod00_b)$coefficients[2,4]

mod01_e <- lm(AfDNPDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
afdnpd_pval_70 <- summary(mod01_f)$coefficients[2,4]

# transpose the noise matrix
# plug into real data, run models and store pvalues
noisy_dist_full <- t(noisy_dist_full)
pvals_afd_full <- pvals_afdnpd_full <- pvals_afd_70 <- pvals_afdnpd_70 <- NULL
for (i in 1:1000){
  elect_data_full$distn <- noisy_dist_full[,i]
  elect_data70 <- subset(elect_data_full, distance2<7)
  # full sample
  mod00_a <- lm(AfDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
  mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                  - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
  pvals_afd_full <- c(pvals_afd_full, summary(mod00_b)$coefficients[2,4])
  mod01_e <- lm(AfDNPDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
  mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                  - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
  pvals_afdnpd_full <- c(pvals_afdnpd_full, summary(mod01_f)$coefficients[2,4])
  # 70km subsample
  mod00_a <- lm(AfDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
  mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                  - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
  pvals_afd_70 <- c(pvals_afd_70, summary(mod00_b)$coefficients[2,4])
  mod01_e <- lm(AfDNPDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
  mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                  - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
  pvals_afdnpd_70 <- c(pvals_afdnpd_70, summary(mod01_f)$coefficients[2,4])
}

### 
### FIGURE SM6.1
###
pdf("noise_elect_afd_full_3deg.pdf", height=8, width=8)
hist(pvals_afd_full, breaks=100, main="", xlab="p-value")
abline(v=afd_pval_full, col="firebrick", lwd=2)
dev.off()
pdf("noise_elect_afd_70_3deg.pdf", height=8, width=8)
hist(pvals_afd_70, breaks=100, main="", xlab="p-value")
abline(v=afd_pval_70, col="firebrick", lwd=2)
dev.off()
pdf("noise_elect_afdnpd_full_3deg.pdf", height=8, width=8)
hist(pvals_afdnpd_full, breaks=100, main="", xlab="p-value")
abline(v=afdnpd_pval_full, col="firebrick", lwd=2)
dev.off()
pdf("noise_elect_afdnpd_70_3deg.pdf", height=8, width=8)
hist(pvals_afdnpd_70, breaks=100, main="", xlab="p-value")
abline(v=afdnpd_pval_70, col="firebrick", lwd=2)
dev.off()

###
### TABLE SM6.1, part 1
###
table(pvals_afd_full<afd_pval_full)/1000
table(pvals_afd_70<afd_pval_70)/1000
table(pvals_afdnpd_full<afdnpd_pval_full)/1000
table(pvals_afdnpd_70<afdnpd_pval_70)/1000

### 3 degrees
# AFD full: 0%
# AFD 70km: 3.8%
# AFD NPD full: 0%
# AFD NPD 70km: 3.4%


###
### second run: 5 degrees
###

rm(list=ls())
set.seed(12435)
library(geosphere); library(geostatsp); library(mvnfast)

# load data
load("elections_fes.RData")
elect_data_full <- elect_data

# calculate distances based on long and lat in data frame
elect_distances_full <- distm(elect_data_full[,c('long','lat')], fun=distGeo)

# rescale distances from m to km
elect_distances_full <- elect_distances_full/1000

# calculate matern variance covariance matrix, 5 degrees
matern_covar_full <- matern(elect_distances_full, param = c(range=5, shape=1, variance=1))

# generate noisy distance using matern covariance
noisy_dist_full <- rmvn(n = 1000, mu = rep(0, nrow(matern_covar_full)), sigma = matern_covar_full)

# get baseline p values (g estimator, not bootstrapped)
# full sample
mod00_a <- lm(AfDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
afd_pval_full <- summary(mod00_b)$coefficients[2,4]

mod01_e <- lm(AfDNPDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
afdnpd_pval_full <- summary(mod01_f)$coefficients[2,4]

# 70km subsample
elect_data70 <- subset(elect_data_full, distance2<7)
mod00_a <- lm(AfDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
afd_pval_70 <- summary(mod00_b)$coefficients[2,4]

mod01_e <- lm(AfDNPDshare ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distance2 + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
afdnpd_pval_70 <- summary(mod01_f)$coefficients[2,4]

# transpose the noise matrix
# plug into real data, run models and store pvalues
noisy_dist_full <- t(noisy_dist_full)
pvals_afd_full <- pvals_afdnpd_full <- pvals_afd_70 <- pvals_afdnpd_70 <- NULL
for (i in 1:1000){
  elect_data_full$distn <- noisy_dist_full[,i]
  elect_data70 <- subset(elect_data_full, distance2<7)
  # full sample
  mod00_a <- lm(AfDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
  mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                  - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
  pvals_afd_full <- c(pvals_afd_full, summary(mod00_b)$coefficients[2,4])
  mod01_e <- lm(AfDNPDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data_full)
  mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                  - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data_full)
  pvals_afdnpd_full <- c(pvals_afdnpd_full, summary(mod01_f)$coefficients[2,4])
  # 70km subsample
  mod00_a <- lm(AfDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
  mod00_b <- lm(I(AfDshare - coef(mod00_a)["foreign_rate"]*foreign_rate 
                  - coef(mod00_a)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
  pvals_afd_70 <- c(pvals_afd_70, summary(mod00_b)$coefficients[2,4])
  mod01_e <- lm(AfDNPDshare ~ distn + vshare33 + unemp33 + pop25 + prop_juden + unemp_rate + foreign_rate + west, elect_data70)
  mod01_f <- lm(I(AfDNPDshare - coef(mod01_e)["foreign_rate"]*foreign_rate 
                  - coef(mod01_e)["unemp_rate"]*unemp_rate) ~ distn + vshare33 + unemp33 + pop25 + prop_juden, elect_data70)
  pvals_afdnpd_70 <- c(pvals_afdnpd_70, summary(mod01_f)$coefficients[2,4])
}


### 
### FIGURE SM6.2
###
pdf("noise_elect_afd_full_5deg.pdf", height=8, width=8)
hist(pvals_afd_full, breaks=100, main="", xlab="p-value")
abline(v=afd_pval_full, col="firebrick", lwd=2)
dev.off()
pdf("noise_elect_afd_70_5deg.pdf", height=8, width=8)
hist(pvals_afd_70, breaks=100, main="", xlab="p-value")
abline(v=afd_pval_70, col="firebrick", lwd=2)
dev.off()
pdf("noise_elect_afdnpd_full_5deg.pdf", height=8, width=8)
hist(pvals_afdnpd_full, breaks=100, main="", xlab="p-value")
abline(v=afdnpd_pval_full, col="firebrick", lwd=2)
dev.off()
pdf("noise_elect_afdnpd_70_5deg.pdf", height=8, width=8)
hist(pvals_afdnpd_70, breaks=100, main="", xlab="p-value")
abline(v=afdnpd_pval_70, col="firebrick", lwd=2)
dev.off()

###
### TABLE SM6.1, part 2
###
table(pvals_afd_full<afd_pval_full)/1000
table(pvals_afd_70<afd_pval_70)/1000
table(pvals_afdnpd_full<afdnpd_pval_full)/1000
table(pvals_afdnpd_70<afdnpd_pval_70)/1000

### 3 degrees
# AFD full: 0%
# AFD 70km: 3.8%
# AFD NPD full: 0%
# AFD NPD 70km: 3.4%

### 5 degrees
# AFD full: 0%
# AFD 70km: 11.2%
# AFD NPD full: 0%
# AFD NPD 70km: 11.1%
